Line Graphs

Making a Basic Line Graph

  1. ggplot(BOD, aes(x=Time, y=demand)) + geom_line() + ylim(0, max(BOD$demand)).

  2. ggplot(BOD, aes(x=Time, y=demand)) + geom_line() + expand_limits(y=0).

Adding Points to a Line Graph

Example

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
data(worldpop)
glimpse(worldpop)
Observations: 58
Variables: 2
$ Year       <int> -10000, -9000, -8000, -7000, -6000, -5000, -4000, -3000, -…
$ Population <int> 2431, 3564, 5136, 7562, 11461, 17920, 28370, 44820, 72108,…
g<-ggplot(worldpop, aes(x=Year, y=Population)) + geom_line() + geom_point()
ggplotly(g)
# Same with a log y-axis
g1<-ggplot(worldpop, aes(x=Year, y=Population)) + geom_line() + geom_point() + scale_y_log10()
ggplotly(g1)
  • Year is an integer so we convert it to a factor and introduce group=1.
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
data(worldpop)


#convert year to a factor and introduce group=1
Year<-as.factor(worldpop$Year)
g<-ggplot(worldpop, aes(x=Year, y=Population, group=1)) + geom_line(color="red") + geom_point() +
  xlab("Year") + ylab("Population") + ggtitle("Line Graph of World Population")

ggplotly(g)
# Same with a log y-axis
g1<-ggplot(worldpop, aes(x=Year, y=Population, group=1)) + geom_line(color="blue") + geom_point() + scale_y_log10()
ggplotly(g1)

Making a Line Graph with Multiple Lines

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(ToothGrowth)
str(ToothGrowth)
'data.frame':   60 obs. of  3 variables:
 $ len : num  4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
 $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
 $ dose: num  0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
# Load plyr so we can use ddply() to create the example data set
library(plyr)

# Summarize the ToothGrowth data
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len))

# Map supp to colour
g1<-ggplot(tg, aes(x=dose, y=length, colour=supp)) + geom_line()
ggplotly(g1)
# Map supp to linetype
g2<-ggplot(tg, aes(x=dose, y=length, linetype=supp)) + geom_line()
ggplotly(g2)
grid.arrange(g1,g2,ncol=2)

Convert continuous x variable as a factor and introduce group

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(ToothGrowth)

# Load plyr so we can use ddply() to create the example data set
library(plyr)

# Summarize the ToothGrowth data
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len))

g<-ggplot(tg, aes(x=factor(dose), y=length, colour=supp, group=supp)) + geom_line()
ggplotly(g)
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(ToothGrowth)

# Load plyr so we can use ddply() to create the example data set
library(plyr)

# Summarize the ToothGrowth data
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len))

ggplot(tg, aes(x=dose, y=length)) + geom_line()

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(ToothGrowth)

# Load plyr so we can use ddply() to create the example data set
library(plyr)

# Summarize the ToothGrowth data
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len))

g1<-ggplot(tg, aes(x=dose, y=length, shape=supp)) + geom_line() +
geom_point(size=4) # Make the points a little larger
ggplotly(g1)
g2<-ggplot(tg, aes(x=dose, y=length, fill=supp)) + geom_line() +
geom_point(size=4, shape=21) # Also use a point with a color fill
ggplotly(g2)
#employ gridExtra package
grid.arrange(g1,g2,ncol=2)

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(ToothGrowth)

# Load plyr so we can use ddply() to create the example data set
library(plyr)

# Summarize the ToothGrowth data
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len))

g<-ggplot(tg, aes(x=dose, y=length, shape=supp)) +
geom_line(position=position_dodge(0.2)) + # Dodge lines by 0.2
geom_point(position=position_dodge(0.2), size=4) # Dodge points by 0.2
ggplotly(g)

Changing the Appearance of Lines

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(BOD)
str(BOD)
'data.frame':   6 obs. of  2 variables:
 $ Time  : num  1 2 3 4 5 7
 $ demand: num  8.3 10.3 19 16 15.6 19.8
 - attr(*, "reference")= chr "A1.4, p. 270"
g1<-ggplot(BOD, aes(x=Time, y=demand)) +
geom_line(linetype="dashed", size=1, colour="blue")
ggplotly(g1)
g2<-ggplot(BOD, aes(x=factor(Time), y=demand,group=1)) +
geom_line(linetype="dashed", size=1, colour="blue")
ggplotly(g2)
grid.arrange(g1,g2,ncol=2)

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(ToothGrowth)

# Load plyr so we can use ddply() to create the example data set
library(plyr)

# Summarize the ToothGrowth data
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len))

g<-ggplot(tg, aes(x=dose, y=length, colour=supp)) +
geom_line() + scale_colour_brewer(palette="Set1")
ggplotly(g)
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(ToothGrowth)

# Load plyr so we can use ddply() to create the example data set
library(plyr)

# Summarize the ToothGrowth data
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len))

# If both lines have the same properties, you need to specify a variable to
# use for grouping
g1<-ggplot(tg, aes(x=dose, y=length, group=supp)) +
geom_line(colour="darkgreen", size=1.5)
ggplotly(g1)
# Since supp is mapped to colour, it will automatically be used for grouping
g2<-ggplot(tg, aes(x=dose, y=length, colour=supp)) +
geom_line(linetype="dashed") +
geom_point(shape=22, size=3, fill="white")
ggplotly(g2)
#apply gridExtra package
grid.arrange(g1,g2,ncol=2)

Changing the Appearance of Points

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(BOD)

g<-ggplot(BOD, aes(x=Time, y=demand)) +
geom_line() +
geom_point(size=4, shape=22, colour="darkred", fill="pink")
ggplotly(g)
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(BOD)

g<-ggplot(BOD, aes(x=Time, y=demand)) +
geom_line() +
geom_point(size=4, shape=21, fill="white")
ggplotly(g)
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(ToothGrowth)

# Load plyr so we can use ddply() to create the example data set
library(plyr)
# Summarize the ToothGrowth data
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len))
# Save the position_dodge specification because we'll use it multiple times
pd <- position_dodge(0.2)

g<-ggplot(tg, aes(x=dose, y=length, fill=supp)) +
geom_line(position=pd) +
geom_point(shape=21, size=3, position=pd) +
scale_fill_manual(values=c("black","white"))
ggplotly(g)

Making a Graph with a Shaded Area

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(sunspotyear)

# Convert the sunspot.year data set into a data frame for this example
sunspotyear <- data.frame(
Year = as.numeric(time(sunspot.year)),
Sunspots = as.numeric(sunspot.year)
)
g<-ggplot(sunspotyear, aes(x=Year, y=Sunspots)) + geom_area()
ggplotly(g)
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(sunspotyear)

# Convert the sunspot.year data set into a data frame for this example
sunspotyear <- data.frame(
Year = as.numeric(time(sunspot.year)),
Sunspots = as.numeric(sunspot.year)
)
g<-ggplot(sunspotyear, aes(x=Year, y=Sunspots)) +
geom_area(colour="black", fill="blue", alpha=.2)
ggplotly(g)
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(sunspotyear)

# Convert the sunspot.year data set into a data frame for this example
sunspotyear <- data.frame(
Year = as.numeric(time(sunspot.year)),
Sunspots = as.numeric(sunspot.year)
)

g<-ggplot(sunspotyear, aes(x=Year, y=Sunspots)) +
geom_area(fill="blue", alpha=.2) +
geom_line()
ggplotly(g)

Making a Stacked Area Graph

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(uspopage)

g<-ggplot(uspopage, aes(x=Year, y=Thousands, fill=AgeGroup)) + geom_area()
ggplotly(g)
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(uspopage)

g<-ggplot(uspopage, aes(x=Year, y=Thousands, fill=AgeGroup)) +
geom_area(colour="black", size=.2, alpha=.4) +
scale_fill_brewer(palette="Blues", breaks=rev(levels(uspopage$AgeGroup)))
ggplotly(g)
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(uspopage)

library(plyr) # For the desc() function
g<-ggplot(uspopage, aes(x=Year, y=Thousands, fill=AgeGroup, order=desc(AgeGroup))) +
geom_area(colour="black", size=.2, alpha=.4) +
scale_fill_brewer(palette="Blues")
ggplotly(g)
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(uspopage)

ggplot(uspopage, aes(x=Year, y=Thousands, fill=AgeGroup, order=desc(AgeGroup))) +
geom_area(colour=NA, alpha=.4) +
scale_fill_brewer(palette="Blues") +
geom_line(position="stack", size=.2)

Making a Proportional Stacked Area Graph

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
data(uspopage)

library(plyr) # For the ddply() function
# Convert Thousands to Percent
uspopage_prop <- ddply(uspopage, "Year", transform,
Percent = Thousands / sum(Thousands) * 100)

#plot
g<-ggplot(uspopage_prop, aes(x=Year, y=Percent, fill=AgeGroup)) +
geom_area(colour="black", size=.2, alpha=.4) +
scale_fill_brewer(palette="Blues", breaks=rev(levels(uspopage$AgeGroup)))
ggplotly(g)
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
data(uspopage)

library(plyr) # For the ddply() function
# Convert Thousands to Percent
uspopage_prop <- ddply(uspopage, "Year", transform,
Percent = Thousands / sum(Thousands) * 100)
DT::datatable(uspopage_prop,options=list(pageLength=10))

Adding a Confidence Region

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
data(climate)

# Grab a subset of the climate data
clim <- subset(climate, Source == "Berkeley",
select=c("Year", "Anomaly10y", "Unc10y"))

# Shaded region
gg<-ggplot(clim, aes(x=Year, y=Anomaly10y)) +
geom_ribbon(aes(ymin=Anomaly10y-Unc10y, ymax=Anomaly10y+Unc10y),
alpha=0.2) +
geom_line()
ggplotly(gg)

-Notice that the geom_ribbon() is before geom_line(), so that the line is drawn on top of the shaded region. If the reverse order were used, the shaded region could obscure the line. In this particular case that wouldn’t be a problem since the shaded region is mostly transparent, but it would be a problem if the shaded region were opaque.

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
data(climate)

# With a dotted line for upper and lower bounds
g<-ggplot(clim, aes(x=Year, y=Anomaly10y)) +
geom_line(aes(y=Anomaly10y-Unc10y), colour="grey50", linetype="dotted") +
geom_line(aes(y=Anomaly10y+Unc10y), colour="grey50", linetype="dotted") +
geom_line()
ggplotly(g)

Summarizing Data by Groups

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

summ_cab<-ddply(cabbages, c("Cult", "Date"), summarise, Weight = mean(HeadWt),
VitC = mean(VitC))

DT::datatable(summ_cab,options=list(pageLength=10))
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

summarise(cabbages, Weight = mean(HeadWt))
    Weight
1 2.593333
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

ddply(cabbages, "Cult", summarise, Weight = mean(HeadWt))
  Cult   Weight
1  c39 2.906667
2  c52 2.280000
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

summ_cab<-ddply(cabbages, c("Cult", "Date"), summarise, Weight = mean(HeadWt),
VitC = mean(VitC))

DT::datatable(summ_cab,options=list(pageLength=10))
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

summ_cab<-ddply(cabbages, c("Cult", "Date"), summarise,
Weight = mean(HeadWt),
sd = sd(HeadWt),
n = length(HeadWt))

DT::datatable(summ_cab,options=list(pageLength=10))

Dealing with NAs

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

c1 <- cabbages # Make a copy
c1$HeadWt[c(1,20,45)] <- NA # Set some values to NA
ddply(c1, c("Cult", "Date"), summarise,
Weight = mean(HeadWt),
sd = sd(HeadWt),
n = length(HeadWt))
  Cult Date Weight        sd  n
1  c39  d16     NA        NA 10
2  c39  d20     NA        NA 10
3  c39  d21   2.74 0.9834181 10
4  c52  d16   2.26 0.4452215 10
5  c52  d20     NA        NA 10
6  c52  d21   1.47 0.2110819 10
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

c1 <- cabbages # Make a copy
ddply(c1, c("Cult", "Date"), summarise,
Weight = mean(HeadWt, na.rm=TRUE),
sd = sd(HeadWt, na.rm=TRUE),
n = sum(!is.na(HeadWt)))
  Cult Date Weight        sd  n
1  c39  d16   3.18 0.9566144 10
2  c39  d20   2.80 0.2788867 10
3  c39  d21   2.74 0.9834181 10
4  c52  d16   2.26 0.4452215 10
5  c52  d20   3.11 0.7908505 10
6  c52  d21   1.47 0.2110819 10

Missing combinations

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

# Copy cabbages and remove all rows with both c52 and d21
c2 <- subset(c1, !( Cult=="c52" & Date=="d21" ) )
c2a <- ddply(c2, c("Cult", "Date"), summarise,
Weight = mean(HeadWt, na.rm=TRUE),
sd = sd(HeadWt, na.rm=TRUE),
n = sum(!is.na(HeadWt)))

c2a
  Cult Date Weight        sd  n
1  c39  d16   3.18 0.9566144 10
2  c39  d20   2.80 0.2788867 10
3  c39  d21   2.74 0.9834181 10
4  c52  d16   2.26 0.4452215 10
5  c52  d20   3.11 0.7908505 10
# Make the graph
ggplot(c2a, aes(x=Date, fill=Cult, y=Weight)) + geom_bar(stat="identity",position="dodge")

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

c2b <- ddply(c2, c("Cult", "Date"), .drop=FALSE, summarise,
Weight = mean(HeadWt, na.rm=TRUE),
sd = sd(HeadWt, na.rm=TRUE),
n = sum(!is.na(HeadWt)))
c2b
  Cult Date Weight        sd  n
1  c39  d16   3.18 0.9566144 10
2  c39  d20   2.80 0.2788867 10
3  c39  d21   2.74 0.9834181 10
4  c52  d16   2.26 0.4452215 10
5  c52  d20   3.11 0.7908505 10
6  c52  d21    NaN        NA  0
# Make the graph
ggplot(c2b, aes(x=Date, fill=Cult, y=Weight)) + geom_bar(stat="identity",position="dodge")

Summarizing Data with Standard Errors and Confidence Intervals

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

ca <- ddply(cabbages, c("Cult", "Date"), summarise,
Weight = mean(HeadWt, na.rm=TRUE),
sd = sd(HeadWt, na.rm=TRUE),
n = sum(!is.na(HeadWt)),
se = sd/sqrt(n))

#apply DT
DT::datatable(ca,options=list(pageLength=10))
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

ddply(cabbages, c("Cult", "Date"), summarise,
Weight = mean(HeadWt, na.rm=TRUE),
sd = sd(HeadWt, na.rm=TRUE),
n = sum(!is.na(HeadWt)),
se = sd / sqrt(n)) 
  Cult Date Weight        sd  n         se
1  c39  d16   3.18 0.9566144 10 0.30250803
2  c39  d20   2.80 0.2788867 10 0.08819171
3  c39  d21   2.74 0.9834181 10 0.31098410
4  c52  d16   2.26 0.4452215 10 0.14079141
5  c52  d20   3.11 0.7908505 10 0.25008887
6  c52  d21   1.47 0.2110819 10 0.06674995

Confidence Intervals

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

ciMult <- qt(.975, ca$n-1)
ciMult
[1] 2.262157 2.262157 2.262157 2.262157 2.262157 2.262157
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

ca$ci <- ca$se * ciMult
ca$ci
[1] 0.6843207 0.1995035 0.7034949 0.3184923 0.5657403 0.1509989
#We could have done this all in one line, like this:
ca$ci95 <- ca$se * qt(.975, ca$n)
ca$ci95
[1] 0.6740299 0.1965034 0.6929158 0.3137028 0.5572327 0.1487282
#For a 99% confidence interval, use .995.
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

summarySE <- function(data=NULL, measurevar, groupvars=NULL,
conf.interval=.95, na.rm=FALSE, .drop=TRUE) {
require(plyr)
# New version of length that can handle NAs: if na.rm==T, don't count them
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}
# This does the summary
datac <- ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col, na.rm) {
c( n = length2(xx[,col], na.rm=na.rm),
mean = mean (xx[,col], na.rm=na.rm),
sd = sd (xx[,col], na.rm=na.rm)
)
},
measurevar,
na.rm
)

# Rename the "mean" column
datac <- rename(datac, c("mean" = measurevar))
datac$se <- datac$sd / sqrt(datac$n) # Calculate standard error of the mean

ciMult <- qt(conf.interval/2 + .5, datac$n-1)
datac$ci <- datac$se * ciMult
return(datac)
}
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)

library(MASS) # For the data set
library(plyr)

# Remove all rows with both c52 and d21
c2 <- subset(cabbages, !( Cult=="c52" & Date=="d21" ) )
# Set some values to NA
c2$HeadWt[c(1,20,45)] <- NA
summarySE(c2, "HeadWt", c("Cult", "Date"), conf.interval=.99,
na.rm=TRUE, .drop=FALSE)
  Cult Date  n   HeadWt        sd         se        ci
1  c39  d16  9 3.255556 0.9824855 0.32749517 1.0988731
2  c39  d20  9 2.722222 0.1394433 0.04648111 0.1559621
3  c39  d21 10 2.740000 0.9834181 0.31098410 1.0106472
4  c52  d16 10 2.260000 0.4452215 0.14079141 0.4575489
5  c52  d20  9 3.044444 0.8094923 0.26983077 0.9053867
6  c52  d21  0      NaN        NA         NA        NA

Converting Data from Wide to Long

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
library(plyr)
data(anthoming)
head(anthoming)
  angle expt ctrl
1   -20    1    0
2   -10    7    3
3     0    2    3
4    10    0    3
5    20    0    1
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
library(plyr)
data(anthoming)

library(reshape2)
melt(anthoming, id.vars="angle", variable.name="condition", value.name="count")
   angle condition count
1    -20      expt     1
2    -10      expt     7
3      0      expt     2
4     10      expt     0
5     20      expt     0
6    -20      ctrl     0
7    -10      ctrl     3
8      0      ctrl     3
9     10      ctrl     3
10    20      ctrl     1
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
library(plyr)
data(drunk)
head(drunk)
     sex 0-29 30-39 40-49 50-59 60+
1   male  185   207   260   180  71
2 female    4    13    10     7  10
melt(drunk, id.vars="sex", measure.vars=c("0-29", "30-39"),
variable.name="age", value.name="count")
     sex   age count
1   male  0-29   185
2 female  0-29     4
3   male 30-39   207
4 female 30-39    13
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
library(plyr)
data(drunk)
head(plum_wide)
  length      time dead alive
1   long   at_once   84   156
2   long in_spring  156    84
3  short   at_once  133   107
4  short in_spring  209    31
melt(plum_wide, id.vars=c("length","time"), variable.name="survival",
value.name="count")
  length      time survival count
1   long   at_once     dead    84
2   long in_spring     dead   156
3  short   at_once     dead   133
4  short in_spring     dead   209
5   long   at_once    alive   156
6   long in_spring    alive    84
7  short   at_once    alive   107
8  short in_spring    alive    31
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
library(plyr)

# Make a copy of the data
co <- corneas
co
  affected notaffected
1      488         484
2      478         478
3      480         492
4      426         444
5      440         436
6      410         398
7      458         464
8      460         476
# Add an ID column
co$id <- 1:nrow(co)
melt(co, id.vars="id", variable.name="eye", value.name="thickness")
   id         eye thickness
1   1    affected       488
2   2    affected       478
3   3    affected       480
4   4    affected       426
5   5    affected       440
6   6    affected       410
7   7    affected       458
8   8    affected       460
9   1 notaffected       484
10  2 notaffected       478
11  3 notaffected       492
12  4 notaffected       444
13  5 notaffected       436
14  6 notaffected       398
15  7 notaffected       464
16  8 notaffected       476

Converting Data from Long to Wide

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
library(plyr)
data(plum)
head(plum)
  length      time survival count
1   long   at_once     dead    84
2   long in_spring     dead   156
3  short   at_once     dead   133
4  short in_spring     dead   209
5   long   at_once    alive   156
6   long in_spring    alive    84
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
library(plyr)
data(plum)

library(reshape2)
dcast(plum, length + time ~ survival, value.var="count")
  length      time dead alive
1   long   at_once   84   156
2   long in_spring  156    84
3  short   at_once  133   107
4  short in_spring  209    31
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
library(plyr)
data(plum)

dcast(plum, time ~ length + survival, value.var="count")
       time long_dead long_alive short_dead short_alive
1   at_once        84        156        133         107
2 in_spring       156         84        209          31

Converting a Time Series Object to Times and Values

library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
library(plyr)
data(nhtemp)
head(nhtemp)
[1] 49.9 52.3 49.4 51.1 49.4 47.9
# Put them in a data frame
nht <- data.frame(year=as.numeric(time(nhtemp)), temp=as.numeric(nhtemp))
nht
   year temp
1  1912 49.9
2  1913 52.3
3  1914 49.4
4  1915 51.1
5  1916 49.4
6  1917 47.9
7  1918 49.8
8  1919 50.9
9  1920 49.3
10 1921 51.9
11 1922 50.8
12 1923 49.6
13 1924 49.3
14 1925 50.6
15 1926 48.4
16 1927 50.7
17 1928 50.9
18 1929 50.6
19 1930 51.5
20 1931 52.8
21 1932 51.8
22 1933 51.1
23 1934 49.8
24 1935 50.2
25 1936 50.4
26 1937 51.6
27 1938 51.8
28 1939 50.9
29 1940 48.8
30 1941 51.7
31 1942 51.0
32 1943 50.6
33 1944 51.7
34 1945 51.5
35 1946 52.1
36 1947 51.3
37 1948 51.0
38 1949 54.0
39 1950 51.4
40 1951 52.7
41 1952 53.1
42 1953 54.6
43 1954 52.0
44 1955 52.0
45 1956 50.9
46 1957 52.6
47 1958 50.2
48 1959 52.6
49 1960 51.6
50 1961 51.9
51 1962 50.5
52 1963 50.9
53 1964 51.7
54 1965 51.4
55 1966 51.7
56 1967 50.8
57 1968 51.9
58 1969 51.8
59 1970 51.9
60 1971 53.0
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
library(plyr)
data(presidents)
head(presidents)
[1] NA 87 82 75 63 50
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
library(plyr)
data(presidents)

pres_rating <- data.frame(
year = as.numeric(time(presidents)),rating = as.numeric(presidents)
)
pres_rating
       year rating
1   1945.00     NA
2   1945.25     87
3   1945.50     82
4   1945.75     75
5   1946.00     63
6   1946.25     50
7   1946.50     43
8   1946.75     32
9   1947.00     35
10  1947.25     60
11  1947.50     54
12  1947.75     55
13  1948.00     36
14  1948.25     39
15  1948.50     NA
16  1948.75     NA
17  1949.00     69
18  1949.25     57
19  1949.50     57
20  1949.75     51
21  1950.00     45
22  1950.25     37
23  1950.50     46
24  1950.75     39
25  1951.00     36
26  1951.25     24
27  1951.50     32
28  1951.75     23
29  1952.00     25
30  1952.25     32
31  1952.50     NA
32  1952.75     32
33  1953.00     59
34  1953.25     74
35  1953.50     75
36  1953.75     60
37  1954.00     71
38  1954.25     61
39  1954.50     71
40  1954.75     57
41  1955.00     71
42  1955.25     68
43  1955.50     79
44  1955.75     73
45  1956.00     76
46  1956.25     71
47  1956.50     67
48  1956.75     75
49  1957.00     79
50  1957.25     62
51  1957.50     63
52  1957.75     57
53  1958.00     60
54  1958.25     49
55  1958.50     48
56  1958.75     52
57  1959.00     57
58  1959.25     62
59  1959.50     61
60  1959.75     66
61  1960.00     71
62  1960.25     62
63  1960.50     61
64  1960.75     57
65  1961.00     72
66  1961.25     83
67  1961.50     71
68  1961.75     78
69  1962.00     79
70  1962.25     71
71  1962.50     62
72  1962.75     74
73  1963.00     76
74  1963.25     64
75  1963.50     62
76  1963.75     57
77  1964.00     80
78  1964.25     73
79  1964.50     69
80  1964.75     69
81  1965.00     71
82  1965.25     64
83  1965.50     69
84  1965.75     62
85  1966.00     63
86  1966.25     46
87  1966.50     56
88  1966.75     44
89  1967.00     44
90  1967.25     52
91  1967.50     38
92  1967.75     46
93  1968.00     36
94  1968.25     49
95  1968.50     35
96  1968.75     44
97  1969.00     59
98  1969.25     65
99  1969.50     65
100 1969.75     56
101 1970.00     66
102 1970.25     53
103 1970.50     61
104 1970.75     52
105 1971.00     51
106 1971.25     48
107 1971.50     54
108 1971.75     49
109 1972.00     49
110 1972.25     61
111 1972.50     NA
112 1972.75     NA
113 1973.00     68
114 1973.25     44
115 1973.50     40
116 1973.75     27
117 1974.00     28
118 1974.25     25
119 1974.50     24
120 1974.75     24
library(gcookbook) # For the data set
library(ggplot2)
library(plotly)
library(dplyr)
library(gridExtra)
library(DT)
library(plyr)

pres_rating2 <- data.frame(
year = as.numeric(floor(time(presidents))),
quarter = as.numeric(cycle(presidents)),
rating = as.numeric(presidents)
)
pres_rating2
    year quarter rating
1   1945       1     NA
2   1945       2     87
3   1945       3     82
4   1945       4     75
5   1946       1     63
6   1946       2     50
7   1946       3     43
8   1946       4     32
9   1947       1     35
10  1947       2     60
11  1947       3     54
12  1947       4     55
13  1948       1     36
14  1948       2     39
15  1948       3     NA
16  1948       4     NA
17  1949       1     69
18  1949       2     57
19  1949       3     57
20  1949       4     51
21  1950       1     45
22  1950       2     37
23  1950       3     46
24  1950       4     39
25  1951       1     36
26  1951       2     24
27  1951       3     32
28  1951       4     23
29  1952       1     25
30  1952       2     32
31  1952       3     NA
32  1952       4     32
33  1953       1     59
34  1953       2     74
35  1953       3     75
36  1953       4     60
37  1954       1     71
38  1954       2     61
39  1954       3     71
40  1954       4     57
41  1955       1     71
42  1955       2     68
43  1955       3     79
44  1955       4     73
45  1956       1     76
46  1956       2     71
47  1956       3     67
48  1956       4     75
49  1957       1     79
50  1957       2     62
51  1957       3     63
52  1957       4     57
53  1958       1     60
54  1958       2     49
55  1958       3     48
56  1958       4     52
57  1959       1     57
58  1959       2     62
59  1959       3     61
60  1959       4     66
61  1960       1     71
62  1960       2     62
63  1960       3     61
64  1960       4     57
65  1961       1     72
66  1961       2     83
67  1961       3     71
68  1961       4     78
69  1962       1     79
70  1962       2     71
71  1962       3     62
72  1962       4     74
73  1963       1     76
74  1963       2     64
75  1963       3     62
76  1963       4     57
77  1964       1     80
78  1964       2     73
79  1964       3     69
80  1964       4     69
81  1965       1     71
82  1965       2     64
83  1965       3     69
84  1965       4     62
85  1966       1     63
86  1966       2     46
87  1966       3     56
88  1966       4     44
89  1967       1     44
90  1967       2     52
91  1967       3     38
92  1967       4     46
93  1968       1     36
94  1968       2     49
95  1968       3     35
96  1968       4     44
97  1969       1     59
98  1969       2     65
99  1969       3     65
100 1969       4     56
101 1970       1     66
102 1970       2     53
103 1970       3     61
104 1970       4     52
105 1971       1     51
106 1971       2     48
107 1971       3     54
108 1971       4     49
109 1972       1     49
110 1972       2     61
111 1972       3     NA
112 1972       4     NA
113 1973       1     68
114 1973       2     44
115 1973       3     40
116 1973       4     27
117 1974       1     28
118 1974       2     25
119 1974       3     24
120 1974       4     24